* List all medical schools

use "${intermediate_data}/physician_compare/physician_compare_cleaned.dta", clear

replace med_sch = lower(med_sch)

drop if mi(med_sch)

keep med_sch

duplicates drop

encode med_sch, gen(med_sch_id)

tempfile _compare
save "`_compare'"

* Add NIH medical school ranking data

forv y = 2005/2017 {
	local files_list : dir "${raw_data}/NIH_med_schl_grants/`y'" files "*.csv"
	
	foreach x of local files_list {
		
		import delimited `"${raw_data}/NIH_med_schl_grants/`y'/`x'"', encoding(UTF-8) clear			
	
		cap rename (organization funding) (med_sch_rankgnih fundingnih)
				
		destring fundingnih, ignore("$" ",") replace
		
		gsort -fundingnih
		
		gen nihrank = _n
				 
		keep med_sch_rankgnih nihrank
		
		gen year = `y'
				
		compress

		save "${intermediate_data}/medical_school_ranking/intermediate/`y'_`x'.dta", replace
	}
}

clear

forv y = 2005/2017 {
	local files_list : dir "${raw_data}/NIH_med_schl_grants/`y'" files "*.csv"
	
	foreach x of local files_list {
		
		append using "${intermediate_data}/medical_school_ranking/intermediate/`y'_`x'.dta"
						
	}
}

replace med_sch_rankgnih = lower(med_sch_rankgnih)
drop if med_sch_rankgnih == "harvard school of public health"
drop if med_sch_rankgnih == "massachusetts institute of technology" 
drop if inlist(med_sch_rankgnih, "university of tx md anderson can ctr", "ut md anderson cancer ctr", "university of texas md anderson can ctr")

keep if inrange(nihrank, 1, 40)
		
reshape wide nihrank, i(med_sch_rankgnih) j(year)

gsort nihrank2017

tempfile _nihranking
save "`_nihranking'"

encode med_sch_rankgnih, gen(med_sch_id_rankgnih)

matchit med_sch_id_rankgnih med_sch_rankgnih using "`_compare'", idusing(med_sch_id) txtusing(med_sch) override

/*	Manually fix matches */
drop if med_sch != "boston university school of medicine" & med_sch_rankgnih == "boston university medical campus" 
drop if med_sch != "columbia university college of physicians and surgeons" & med_sch_rankgnih == "columbia university health sciences" 
drop if med_sch != "emory university school of medicine" & med_sch_rankgnih == "emory university" 
drop if med_sch != "mount sinai school of medicine of city university of new york" & med_sch_rankgnih == "icahn school of medicine at mount sinai" 
drop if med_sch != "ohio state university college of medicine" & med_sch_rankgnih == "ohio state university" 
drop if med_sch != "university of alabama school of medicine" & med_sch_rankgnih == "university of alabama at birmingham" 
drop if med_sch != "university of california, ucla school of medicine" & med_sch_rankgnih == "university of california los angeles" 
drop if med_sch != "university of colorado school of medicine" & med_sch_rankgnih == "university of colorado denver" 
drop if med_sch != "university of illinois at chicago health science center" & med_sch_rankgnih == "university of illinois" 
drop if med_sch != "university of minnesota medical school" & med_sch_rankgnih == "university of minnesota twin cities" 

gsort med_sch_rankgnih -similscore

collapse (first) med_sch, by(med_sch_rankgnih) 

replace med_sch = upper(med_sch)

merge 1:1 med_sch_rankgnih using "`_nihranking'", assert(matched) nogen

collapse (first) med_sch_rankgnih (sum) nihrank*, by(med_sch)

/*	replace 0-valued ranks with missing */
foreach var of varlist *rank20* {
	replace `var' = . if `var' == 0
}

tempfile _nihranking_merge
save "`_nihranking_merge'"

* Add U.S. News and World Report medical school ranking data

import delimited "${raw_data}/medical_school_ranking/hist_medical_school_rank.csv", varnames(1) clear 

rename rank* newsrank*

cap rename medical_school_name med_sch_rankgnews
cap rename ïmedical_school_name med_sch_rankgnews

tempfile _newsranking
save "`_newsranking'"

encode med_sch_rankgnews, gen(med_sch_id_rankgnews)

keep *med*

matchit med_sch_id_rankgnews med_sch_rankgnews using "`_compare'", idusing(med_sch_id) txtusing(med_sch) override

/*	Manually fix matches */
drop if med_sch != "emory university school of medicine" & med_sch_rankgnews == "emory university" 
drop if med_sch != "ohio state university college of medicine" & med_sch_rankgnews == "ohio state university"
drop if med_sch != "university of alabama school of medicine" & med_sch_rankgnews == "university of alabama birmingham"
drop if med_sch != "university of california, ucla school of medicine" & med_sch_rankgnews == "university of california los angeles"
drop if med_sch != "university of colorado school of medicine" & med_sch_rankgnews == "university of colorado health services center"
drop if med_sch != "university of illinois at chicago health science center" & med_sch_rankgnews == "university of illinois"
drop if med_sch != "university of minnesota medical school" & med_sch_rankgnews == "university of minnesota twin cities"

gsort med_sch_rankgnews -similscore

collapse (first) med_sch, by(med_sch_rankgnews) 

replace med_sch = upper(med_sch)

merge 1:1 med_sch_rankgnews using "`_newsranking'", assert(matched) nogen

tempfile _newsranking_merge
save "`_newsranking_merge'"

* Combine rankng information

use "`_compare'", clear

replace med_sch = upper(med_sch)

merge 1:1 med_sch using "`_newsranking_merge'", assert(matched master) nogen

merge 1:1 med_sch using "`_nihranking_merge'", assert(master matched) nogen

keep if !mi(med_sch_rankgnews) | !mi(med_sch_rankgnih)

compress

keep med_sch med_sch_rankgnews med_sch_rankgnih newsrank* nihrank*

order med_sch med_sch_rankgnews med_sch_rankgnih newsrank* nihrank*

gisid med_sch

sort med_sch 

save "${intermediate_data}/medical_school_ranking/medical_school_ranks.dta", replace
